Segment Size
##Segment Size##
cat('Propotion of each segment: \n')
## Propotion of each segment:
print(table(myData$segment))
##
## 1 2 3 4 5 6 7
## 1082 543 488 496 1199 637 523
print(round(table(myData$segment)/nrow(myData),digit=3))
##
## 1 2 3 4 5 6 7
## 0.218 0.109 0.098 0.100 0.241 0.128 0.105
seg1
########
##seg1##
########
seg1rate <- nrow(seg1)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()
for(i in colnames(seg1)){
if(length(table(seg1[,i]))==length(table(myData[,i]))){
compare_rate[i]<-list(value=(round((table(seg1[i])/table(myData[i]))/seg1rate,digit=3)))}
else{
need_mannual <- append(need_mannual,i)
}
}
cat('seg1 compare to total: \n')
## seg1 compare to total:
## $jan
##
## 0 1
## 1.025 0.837
##
## $feb
##
## 0 1
## 1.025 0.852
##
## $mar
##
## 0 1
## 1.024 0.880
##
## $apr
##
## 0 1
## 1.036 0.805
##
## $may
##
## 0 1
## 1.021 0.902
##
## $jun
##
## 0 1
## 1.021 0.881
##
## $jul
##
## 0 1
## 0.980 1.108
##
## $aug
##
## 0 1
## 0.970 1.097
##
## $sep
##
## 0 1
## 0.980 1.101
##
## $oct
##
## 0 1
## 1.014 0.889
##
## $nov
##
## 0 1
## 1.002 0.975
##
## $dec
##
## 0 1
## 1.011 0.904
##
## $recency
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 0.849 1.179 1.216 1.246 0.790 0.927 0.783 0.944 0.873 0.889 1.007 0.955
## 12 13 14 15 16 17 18 19 20 21
## 1.099 1.198 1.069 1.168 0.852 1.100 0.723 0.620 0.889 0.758
##
## $tenure
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 1.148 1.424 1.505 1.530 0.782 1.113 0.918 1.023 1.216 0.795 1.030 0.956
## 12 13 14 15 16 17 18 19 20 21
## 1.078 1.082 0.985 1.178 0.944 0.877 0.760 0.840 0.784 0.770
##
## $retained_flag
##
## 0 1
## 1.052 0.910
##
## $ever_responded
##
## 0 1
## 1.05 0.90
##
## $man_dept_buy
##
## 0 1
## 1.022 0.919
##
## $womens_dept_buy
##
## 0 1
## 1.020 0.982
##
## $kids_dept_buy
##
## 0 1
## 1.001 0.993
##
## $athletic_dept_buy
##
## 0 1
## 0.962 1.026
##
## $accessories_dept_buy
##
## 0 1
## 1.003 0.993
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
## jan.1 feb.1 mar.1 apr.1 jun.1 jul.1
## -0.163 -0.148 -0.120 -0.195 -0.119 0.108
## sep.1 oct.1 recency.0 recency.1 recency.2 recency.3
## 0.101 -0.111 -0.151 0.179 0.216 0.246
## recency.4 recency.6 recency.8 recency.9 recency.13 recency.15
## -0.210 -0.217 -0.127 -0.111 0.198 0.168
## recency.16 recency.18 recency.19 recency.20 recency.21 tenure.0
## -0.148 -0.277 -0.380 -0.111 -0.242 0.148
## tenure.1 tenure.2 tenure.3 tenure.4 tenure.5 tenure.8
## 0.424 0.505 0.530 -0.218 0.113 0.216
## tenure.9 tenure.15 tenure.17 tenure.18 tenure.19 tenure.20
## -0.205 0.178 -0.123 -0.240 -0.160 -0.216
## tenure.21
## -0.230
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it
recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it
tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
cat('Mean of Seg1 / Mean of total:\n')
## Mean of Seg1 / Mean of total:
mean_table <- round(colMeans(seg1)/colMeans(myData),digit=3)[-46]
print(mean_table)
## spend_per_txn spend_per_item jan
## 1.058 1.023 0.837
## feb mar apr
## 0.852 0.880 0.805
## may jun jul
## 0.902 0.881 1.108
## aug sep oct
## 1.097 1.101 0.889
## nov dec total_spend
## 0.975 0.904 0.994
## mens_dept_spend womens_dept_spend kids_dept_spend
## 0.910 0.980 0.974
## athletic_dept_spend accessories_spend recency
## 1.021 0.999 0.950
## response total_txns total_items
## 0.847 0.947 0.970
## unique_sizes unique_depts internet_spend
## 0.993 0.994 0.626
## tenure retained_flag retained_spend
## 0.922 0.910 0.900
## cmpns pct_response ever_responded
## 0.878 0.939 0.900
## opens clicks hhincome
## 0.990 1.012 0.995
## hhage hhwom hhmen
## 0.988 0.984 0.996
## hhkids man_dept_buy womens_dept_buy
## 0.991 0.919 0.982
## kids_dept_buy athletic_dept_buy accessories_dept_buy
## 0.993 1.026 0.993
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
## jan feb mar apr jun
## -0.163 -0.148 -0.120 -0.195 -0.119
## jul sep oct response internet_spend
## 0.108 0.101 -0.111 -0.153 -0.374
## cmpns
## -0.122
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it
month_mean <- data.frame(
month=seq(1,12),
value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\n mean of total', title='Purchase happening in month')+
scale_x_continuous(breaks=seq(1,12))

seg2
########
##seg2##
########
seg2rate <- nrow(seg2)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()
for(i in colnames(seg2)){
if(length(table(seg2[,i]))==length(table(myData[,i]))){
compare_rate[i]<-list(value=(round((table(seg2[i])/table(myData[i]))/seg2rate,digit=3)))}
else{
need_mannual <- append(need_mannual,i)
}
}
cat('seg2 compare to total: \n')
## seg2 compare to total:
## $jan
##
## 0 1
## 1.010 0.937
##
## $feb
##
## 0 1
## 1.003 0.985
##
## $mar
##
## 0 1
## 1.008 0.959
##
## $apr
##
## 0 1
## 1.015 0.920
##
## $may
##
## 0 1
## 0.995 1.022
##
## $jun
##
## 0 1
## 1.029 0.834
##
## $jul
##
## 0 1
## 1.005 0.975
##
## $aug
##
## 0 1
## 1.000 0.999
##
## $sep
##
## 0 1
## 1.021 0.893
##
## $oct
##
## 0 1
## 0.981 1.144
##
## $nov
##
## 0 1
## 0.994 1.073
##
## $dec
##
## 0 1
## 0.978 1.195
##
## $recency
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 1.327 0.917 0.956 1.039 0.878 1.175 0.993 1.070 1.243 1.082 1.158 1.111
## 12 13 14 15 16 17 18 19 20 21
## 1.123 0.845 1.025 1.029 0.548 0.822 0.864 1.179 0.738 0.504
##
## $tenure
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 1.083 1.158 1.139 1.298 0.973 0.832 0.798 1.233 1.515 1.380 1.411 1.161
## 12 13 14 15 16 17 18 19 20 21
## 1.214 1.056 1.045 0.884 0.759 0.829 0.742 0.889 0.753 0.782
##
## $retained_flag
##
## 0 1
## 1.013 0.977
##
## $ever_responded
##
## 0 1
## 1.006 0.987
##
## $man_dept_buy
##
## 0 1
## 0.947 1.193
##
## $womens_dept_buy
##
## 0 1
## 1.011 0.990
##
## $kids_dept_buy
##
## 0 1
## 1.021 0.901
##
## $athletic_dept_buy
##
## 0 1
## 1.034 0.977
##
## $accessories_dept_buy
##
## 0 1
## 1.021 0.954
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
## jun.1 sep.1 oct.1 dec.1 recency.0
## -0.166 -0.107 0.144 0.195 0.327
## recency.4 recency.5 recency.8 recency.10 recency.11
## -0.122 0.175 0.243 0.158 0.111
## recency.12 recency.13 recency.16 recency.17 recency.18
## 0.123 -0.155 -0.452 -0.178 -0.136
## recency.19 recency.20 recency.21 tenure.1 tenure.2
## 0.179 -0.262 -0.496 0.158 0.139
## tenure.3 tenure.5 tenure.6 tenure.7 tenure.8
## 0.298 -0.168 -0.202 0.233 0.515
## tenure.9 tenure.10 tenure.11 tenure.12 tenure.15
## 0.380 0.411 0.161 0.214 -0.116
## tenure.16 tenure.17 tenure.18 tenure.19 tenure.20
## -0.241 -0.171 -0.258 -0.111 -0.247
## tenure.21 man_dept_buy.1
## -0.218 0.193
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it
recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it
tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
cat('Mean of Seg2 / Mean of total:\n')
## Mean of Seg2 / Mean of total:
mean_table <- round(colMeans(seg2)/colMeans(myData),digit=3)[-46]
print(mean_table)
## spend_per_txn spend_per_item jan
## 1.016 1.001 0.937
## feb mar apr
## 0.985 0.959 0.920
## may jun jul
## 1.022 0.834 0.975
## aug sep oct
## 0.999 0.893 1.144
## nov dec total_spend
## 1.073 1.195 0.993
## mens_dept_spend womens_dept_spend kids_dept_spend
## 1.133 1.053 0.850
## athletic_dept_spend accessories_spend recency
## 0.945 0.939 0.954
## response total_txns total_items
## 0.911 0.977 0.989
## unique_sizes unique_depts internet_spend
## 0.995 0.992 1.749
## tenure retained_flag retained_spend
## 0.933 0.977 0.942
## cmpns pct_response ever_responded
## 0.925 1.028 0.987
## opens clicks hhincome
## 0.998 1.002 0.989
## hhage hhwom hhmen
## 1.000 0.999 0.995
## hhkids man_dept_buy womens_dept_buy
## 0.976 1.193 0.990
## kids_dept_buy athletic_dept_buy accessories_dept_buy
## 0.901 0.977 0.954
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
## jun sep oct dec
## -0.166 -0.107 0.144 0.195
## mens_dept_spend kids_dept_spend internet_spend man_dept_buy
## 0.133 -0.150 0.749 0.193
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it
month_mean <- data.frame(
month=seq(1,12),
value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\n mean of total', title='Purchase happening in month')+
scale_x_continuous(breaks=seq(1,12))

seg3
########
##seg3##
########
seg3rate <- nrow(seg3)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()
for(i in colnames(seg3)){
if(length(table(seg3[,i]))==length(table(myData[,i]))){
compare_rate[i]<-list(value=(round((table(seg3[i])/table(myData[i]))/seg3rate,digit=3)))}
else{
need_mannual <- append(need_mannual,i)
}
}
cat('seg3 compare to total: \n')
## seg3 compare to total:
## $jan
##
## 0 1
## 1.045 0.705
##
## $feb
##
## 0 1
## 1.034 0.793
##
## $mar
##
## 0 1
## 1.028 0.859
##
## $apr
##
## 0 1
## 1.003 0.984
##
## $may
##
## 0 1
## 1.018 0.919
##
## $jun
##
## 0 1
## 1.008 0.956
##
## $jul
##
## 0 1
## 1.021 0.889
##
## $aug
##
## 0 1
## 1.022 0.929
##
## $sep
##
## 0 1
## 1.001 0.994
##
## $oct
##
## 0 1
## 0.986 1.111
##
## $nov
##
## 0 1
## 1.003 0.967
##
## $dec
##
## 0 1
## 1.008 0.933
##
## $recency
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 1.069 1.070 0.851 0.963 0.977 1.214 1.144 0.780 0.885 0.657 0.859 0.942
## 12 13 14 15 16 17 18 19 20 21
## 1.062 1.383 1.097 1.084 0.914 0.732 1.122 1.124 1.231 1.121
##
## $unique_depts
##
## 1 2 3 4 5 6 7
## 1.087 0.992 0.901 0.844 0.932 0.885 2.545
##
## $tenure
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 0.804 1.224 1.352 0.794 0.794 1.028 1.305 0.791 0.674 0.398 1.380 1.137
## 12 13 14 15 16 17 18 19 20 21
## 1.195 1.224 1.078 0.984 1.207 0.791 0.791 1.076 0.931 0.805
##
## $retained_flag
##
## 0 1
## 1.076 0.869
##
## $ever_responded
##
## 0 1
## 1.070 0.861
##
## $man_dept_buy
##
## 0 1
## 0.991 1.033
##
## $womens_dept_buy
##
## 0 1
## 1.081 0.926
##
## $kids_dept_buy
##
## 0 1
## 0.979 1.095
##
## $athletic_dept_buy
##
## 0 1
## 1.125 0.915
##
## $accessories_dept_buy
##
## 0 1
## 1.031 0.931
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
## jan.1 feb.1 mar.1
## -0.295 -0.207 -0.141
## jul.1 oct.1 recency.2
## -0.111 0.111 -0.149
## recency.5 recency.6 recency.7
## 0.214 0.144 -0.220
## recency.8 recency.9 recency.10
## -0.115 -0.343 -0.141
## recency.13 recency.17 recency.18
## 0.383 -0.268 0.122
## recency.19 recency.20 recency.21
## 0.124 0.231 0.121
## unique_depts.4 unique_depts.6 unique_depts.7
## -0.156 -0.115 1.545
## tenure.0 tenure.1 tenure.2
## -0.196 0.224 0.352
## tenure.3 tenure.4 tenure.6
## -0.206 -0.206 0.305
## tenure.7 tenure.8 tenure.9
## -0.209 -0.326 -0.602
## tenure.10 tenure.11 tenure.12
## 0.380 0.137 0.195
## tenure.13 tenure.16 tenure.17
## 0.224 0.207 -0.209
## tenure.18 tenure.21 retained_flag.1
## -0.209 -0.195 -0.131
## ever_responded.1 athletic_dept_buy.0
## -0.139 0.125
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it
recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it
tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
cat('Mean of Seg3 / Mean of total:\n')
## Mean of Seg3 / Mean of total:
mean_table <- round(colMeans(seg3)/colMeans(myData),digit=3)[-46]
print(mean_table)
## spend_per_txn spend_per_item jan
## 0.967 0.959 0.705
## feb mar apr
## 0.793 0.859 0.984
## may jun jul
## 0.919 0.956 0.889
## aug sep oct
## 0.929 0.994 1.111
## nov dec total_spend
## 0.967 0.933 0.902
## mens_dept_spend womens_dept_spend kids_dept_spend
## 1.158 0.883 1.087
## athletic_dept_spend accessories_spend recency
## 0.816 1.004 1.021
## response total_txns total_items
## 0.719 0.931 0.944
## unique_sizes unique_depts internet_spend
## 0.952 0.961 0.718
## tenure retained_flag retained_spend
## 0.977 0.869 0.866
## cmpns pct_response ever_responded
## 0.883 0.804 0.861
## opens clicks hhincome
## 0.999 1.008 0.993
## hhage hhwom hhmen
## 0.995 0.975 0.999
## hhkids man_dept_buy womens_dept_buy
## 1.002 1.033 0.926
## kids_dept_buy athletic_dept_buy accessories_dept_buy
## 1.095 0.915 0.931
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
## jan feb mar
## -0.295 -0.207 -0.141
## jul oct mens_dept_spend
## -0.111 0.111 0.158
## womens_dept_spend athletic_dept_spend response
## -0.117 -0.184 -0.281
## internet_spend retained_flag retained_spend
## -0.282 -0.131 -0.134
## cmpns pct_response ever_responded
## -0.117 -0.196 -0.139
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it
month_mean <- data.frame(
month=seq(1,12),
value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\n mean of total', title='Purchase happening in month')+
scale_x_continuous(breaks=seq(1,12))

seg4
########
##seg4##
########
seg4rate <- nrow(seg4)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()
for(i in colnames(seg4)){
if(length(table(seg4[,i]))==length(table(myData[,i]))){
compare_rate[i]<-list(value=(round((table(seg4[i])/table(myData[i]))/seg4rate,digit=3)))}
else{
need_mannual <- append(need_mannual,i)
}
}
cat('seg4 compare to total: \n')
## seg4 compare to total:
## $jan
##
## 0 1
## 1.026 0.830
##
## $feb
##
## 0 1
## 1.048 0.709
##
## $mar
##
## 0 1
## 1.051 0.748
##
## $apr
##
## 0 1
## 0.999 1.007
##
## $may
##
## 0 1
## 0.974 1.119
##
## $jun
##
## 0 1
## 0.932 1.390
##
## $jul
##
## 0 1
## 0.980 1.106
##
## $aug
##
## 0 1
## 1.048 0.846
##
## $sep
##
## 0 1
## 1.052 0.730
##
## $oct
##
## 0 1
## 1.036 0.723
##
## $nov
##
## 0 1
## 1.023 0.699
##
## $dec
##
## 0 1
## 1.023 0.801
##
## $recency
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 0.952 0.514 0.681 1.011 1.683 1.317 1.165 0.687 0.762 0.915 0.718 1.042
## 12 13 14 15 16 17 18 19 20 21
## 0.676 0.817 1.166 1.245 1.799 1.739 1.262 0.983 1.050 0.827
##
## $tenure
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 0.659 0.317 0.457 0.852 2.202 1.821 1.541 0.778 0.597 0.560 0.702 0.763
## 12 13 14 15 16 17 18 19 20 21
## 0.818 0.867 1.228 1.340 1.504 1.232 1.049 0.744 0.855 0.856
##
## $retained_flag
##
## 0 1
## 0.998 1.004
##
## $ever_responded
##
## 0 1
## 1.059 0.883
##
## $man_dept_buy
##
## 0 1
## 1.003 0.989
##
## $womens_dept_buy
##
## 0 1
## 1.060 0.946
##
## $kids_dept_buy
##
## 0 1
## 1.000 0.998
##
## $athletic_dept_buy
##
## 0 1
## 0.972 1.019
##
## $accessories_dept_buy
##
## 0 1
## 1.10 0.78
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
## jan.1 feb.1 mar.1
## -0.170 -0.291 -0.252
## may.1 jun.1 jul.1
## 0.119 0.390 0.106
## aug.1 sep.1 oct.1
## -0.154 -0.270 -0.277
## nov.1 dec.1 recency.1
## -0.301 -0.199 -0.486
## recency.2 recency.4 recency.5
## -0.319 0.683 0.317
## recency.6 recency.7 recency.8
## 0.165 -0.313 -0.238
## recency.10 recency.12 recency.13
## -0.282 -0.324 -0.183
## recency.14 recency.15 recency.16
## 0.166 0.245 0.799
## recency.17 recency.18 recency.21
## 0.739 0.262 -0.173
## tenure.0 tenure.1 tenure.2
## -0.341 -0.683 -0.543
## tenure.3 tenure.4 tenure.5
## -0.148 1.202 0.821
## tenure.6 tenure.7 tenure.8
## 0.541 -0.222 -0.403
## tenure.9 tenure.10 tenure.11
## -0.440 -0.298 -0.237
## tenure.12 tenure.13 tenure.14
## -0.182 -0.133 0.228
## tenure.15 tenure.16 tenure.17
## 0.340 0.504 0.232
## tenure.19 tenure.20 tenure.21
## -0.256 -0.145 -0.144
## ever_responded.1 accessories_dept_buy.1
## -0.117 -0.220
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it
recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it
tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
cat('Mean of Seg4 / Mean of total:\n')
## Mean of Seg4 / Mean of total:
mean_table <- round(colMeans(seg4)/colMeans(myData),digit=3)[-46]
print(mean_table)
## spend_per_txn spend_per_item jan
## 0.974 0.995 0.830
## feb mar apr
## 0.709 0.748 1.007
## may jun jul
## 1.119 1.390 1.106
## aug sep oct
## 0.846 0.730 0.723
## nov dec total_spend
## 0.699 0.801 0.903
## mens_dept_spend womens_dept_spend kids_dept_spend
## 0.969 0.836 0.905
## athletic_dept_spend accessories_spend recency
## 0.939 0.732 1.093
## response total_txns total_items
## 0.815 0.911 0.919
## unique_sizes unique_depts internet_spend
## 0.951 0.956 1.709
## tenure retained_flag retained_spend
## 1.011 1.004 1.009
## cmpns pct_response ever_responded
## 0.956 0.853 0.883
## opens clicks hhincome
## 1.020 1.021 0.963
## hhage hhwom hhmen
## 0.975 0.978 0.964
## hhkids man_dept_buy womens_dept_buy
## 0.926 0.989 0.946
## kids_dept_buy athletic_dept_buy accessories_dept_buy
## 0.998 1.019 0.780
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
## jan feb mar
## -0.170 -0.291 -0.252
## may jun jul
## 0.119 0.390 0.106
## aug sep oct
## -0.154 -0.270 -0.277
## nov dec womens_dept_spend
## -0.301 -0.199 -0.164
## accessories_spend response internet_spend
## -0.268 -0.185 0.709
## pct_response ever_responded accessories_dept_buy
## -0.147 -0.117 -0.220
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it
month_mean <- data.frame(
month=seq(1,12),
value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\n mean of total', title='Purchase happening in month')+
scale_x_continuous(breaks=seq(1,12))

seg5
########
##seg5##
########
seg5rate <- nrow(seg5)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()
for(i in colnames(seg5)){
if(length(table(seg5[,i]))==length(table(myData[,i]))){
compare_rate[i]<-list(value=(round((table(seg5[i])/table(myData[i]))/seg5rate,digit=3)))}
else{
need_mannual <- append(need_mannual,i)
}
}
cat('seg5 compare to total: \n')
## seg5 compare to total:
## $jan
##
## 0 1
## 0.994 1.042
##
## $feb
##
## 0 1
## 1.008 0.951
##
## $mar
##
## 0 1
## 0.999 1.003
##
## $apr
##
## 0 1
## 0.995 1.025
##
## $may
##
## 0 1
## 1.011 0.949
##
## $jun
##
## 0 1
## 1.020 0.885
##
## $jul
##
## 0 1
## 1.013 0.931
##
## $aug
##
## 0 1
## 1.025 0.919
##
## $sep
##
## 0 1
## 1.022 0.886
##
## $oct
##
## 0 1
## 1.000 0.999
##
## $nov
##
## 0 1
## 0.995 1.065
##
## $dec
##
## 0 1
## 1.009 0.921
##
## $recency
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 1.243 0.902 1.003 0.915 1.011 0.976 1.269 1.203 0.923 1.047 0.979 1.054
## 12 13 14 15 16 17 18 19 20 21
## 1.042 0.766 0.911 0.907 1.092 0.769 0.914 1.042 0.735 1.330
##
## $unique_depts
##
## 1 2 3 4 5 6 7
## 1.035 1.075 0.960 0.738 0.759 0.540 2.072
##
## $tenure
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 1.745 1.128 1.083 0.911 1.117 1.004 1.169 1.224 0.960 1.181 1.065 1.136
## 12 13 14 15 16 17 18 19 20 21
## 0.909 0.876 0.773 0.924 0.835 1.046 1.008 0.900 0.846 1.036
##
## $retained_flag
##
## 0 1
## 1.011 0.981
##
## $ever_responded
##
## 0 1
## 1.060 0.882
##
## $opens
##
## 0.04 0.041 0.042 0.043 0.044 0.045 0.046 0.047 0.048 0.049 0.05 0.051
## 0.921 0.444 0.622 1.015 0.761 0.761 0.625 0.712 0.857 0.869 1.013 1.228
## 0.052 0.053 0.054 0.055 0.056 0.057 0.058 0.059 0.06 0.061 0.062 0.063
## 1.434 1.076 1.036 1.058 0.921 1.059 1.064 1.322 0.849 0.885 1.007 0.921
## 0.064 0.065 0.066 0.067 0.068 0.069 0.07 0.071 0.072 0.073 0.074 0.075
## 0.853 0.938 1.188 0.998 1.120 1.120 0.862 0.893 1.021 1.059 1.071 1.395
## 0.076 0.077 0.078 0.079 0.08 0.081 0.082 0.083 0.084 0.085 0.086 0.087
## 0.518 0.345 0.863 1.776 0.863 0.812 1.130 0.921 0.637 1.657 1.209 0.518
## 0.088 0.089 0.09 0.091 0.092 0.093 0.094 0.095 0.096 0.097 0.098 0.099
## 1.036 1.105 0.829 1.776 0.637 0.777 1.462 1.450 0.296 1.130 0.296 1.184
## 0.1 0.101 0.102 0.103 0.104 0.105
## 0.829 0.592 1.706 0.888 0.956 1.657
##
## $man_dept_buy
##
## 0 1
## 1.014 0.949
##
## $womens_dept_buy
##
## 0 1
## 1.014 0.988
##
## $kids_dept_buy
##
## 0 1
## 1.023 0.892
##
## $athletic_dept_buy
##
## 0 1
## 1.062 0.958
##
## $accessories_dept_buy
##
## 0 1
## 1.009 0.979
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
## jun.1 sep.1 recency.0 recency.6
## -0.115 -0.114 0.243 0.269
## recency.7 recency.13 recency.17 recency.20
## 0.203 -0.234 -0.231 -0.265
## recency.21 unique_depts.4 unique_depts.5 unique_depts.6
## 0.330 -0.262 -0.241 -0.460
## unique_depts.7 tenure.0 tenure.1 tenure.4
## 1.072 0.745 0.128 0.117
## tenure.6 tenure.7 tenure.9 tenure.11
## 0.169 0.224 0.181 0.136
## tenure.13 tenure.14 tenure.16 tenure.20
## -0.124 -0.227 -0.165 -0.154
## ever_responded.1 opens.0.041 opens.0.042 opens.0.044
## -0.118 -0.556 -0.378 -0.239
## opens.0.045 opens.0.046 opens.0.047 opens.0.048
## -0.239 -0.375 -0.288 -0.143
## opens.0.049 opens.0.051 opens.0.052 opens.0.059
## -0.131 0.228 0.434 0.322
## opens.0.06 opens.0.061 opens.0.064 opens.0.066
## -0.151 -0.115 -0.147 0.188
## opens.0.068 opens.0.069 opens.0.07 opens.0.071
## 0.120 0.120 -0.138 -0.107
## opens.0.075 opens.0.076 opens.0.077 opens.0.078
## 0.395 -0.482 -0.655 -0.137
## opens.0.079 opens.0.08 opens.0.081 opens.0.082
## 0.776 -0.137 -0.188 0.130
## opens.0.084 opens.0.085 opens.0.086 opens.0.087
## -0.363 0.657 0.209 -0.482
## opens.0.089 opens.0.09 opens.0.091 opens.0.092
## 0.105 -0.171 0.776 -0.363
## opens.0.093 opens.0.094 opens.0.095 opens.0.096
## -0.223 0.462 0.450 -0.704
## opens.0.097 opens.0.098 opens.0.099 opens.0.1
## 0.130 -0.704 0.184 -0.171
## opens.0.101 opens.0.102 opens.0.103 opens.0.105
## -0.408 0.706 -0.112 0.657
## kids_dept_buy.1
## -0.108
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it
recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it
tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
cat('Mean of Seg5 / Mean of total:\n')
## Mean of Seg5 / Mean of total:
mean_table <- round(colMeans(seg5)/colMeans(myData),digit=3)[-46]
print(mean_table)
## spend_per_txn spend_per_item jan
## 0.987 1.016 1.042
## feb mar apr
## 0.951 1.003 1.025
## may jun jul
## 0.949 0.885 0.931
## aug sep oct
## 0.919 0.886 0.999
## nov dec total_spend
## 1.065 0.921 0.918
## mens_dept_spend womens_dept_spend kids_dept_spend
## 0.934 0.934 0.888
## athletic_dept_spend accessories_spend recency
## 0.910 0.906 0.982
## response total_txns total_items
## 0.825 0.931 0.897
## unique_sizes unique_depts internet_spend
## 0.947 0.957 0.955
## tenure retained_flag retained_spend
## 0.958 0.981 0.949
## cmpns pct_response ever_responded
## 0.918 0.884 0.882
## opens clicks hhincome
## 1.009 1.013 0.981
## hhage hhwom hhmen
## 0.978 0.979 0.978
## hhkids man_dept_buy womens_dept_buy
## 0.954 0.949 0.988
## kids_dept_buy athletic_dept_buy accessories_dept_buy
## 0.892 0.958 0.979
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
## jun sep kids_dept_spend response
## -0.115 -0.114 -0.112 -0.175
## total_items pct_response ever_responded kids_dept_buy
## -0.103 -0.116 -0.118 -0.108
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it
month_mean <- data.frame(
month=seq(1,12),
value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\n mean of total', title='Purchase happening in month')+
scale_x_continuous(breaks=seq(1,12))

seg6
########
##seg6##
########
seg6rate <- nrow(seg6)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()
for(i in colnames(seg6)){
if(length(table(seg6[,i]))==length(table(myData[,i]))){
compare_rate[i]<-list(value=(round((table(seg6[i])/table(myData[i]))/seg6rate,digit=3)))}
else{
need_mannual <- append(need_mannual,i)
}
}
cat('seg6 compare to total: \n')
## seg6 compare to total:
## $jan
##
## 0 1
## 0.959 1.269
##
## $feb
##
## 0 1
## 0.968 1.193
##
## $mar
##
## 0 1
## 0.933 1.334
##
## $apr
##
## 0 1
## 0.928 1.387
##
## $may
##
## 0 1
## 0.948 1.241
##
## $jun
##
## 0 1
## 0.960 1.231
##
## $jul
##
## 0 1
## 0.976 1.131
##
## $aug
##
## 0 1
## 0.931 1.224
##
## $sep
##
## 0 1
## 0.932 1.350
##
## $oct
##
## 0 1
## 0.966 1.263
##
## $nov
##
## 0 1
## 0.981 1.242
##
## $dec
##
## 0 1
## 0.952 1.414
##
## $recency
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 1.131 1.354 1.060 0.886 0.811 0.763 0.725 1.006 0.763 1.006 1.283 0.992
## 12 13 14 15 16 17 18 19 20 21
## 0.957 1.144 1.008 0.877 1.027 0.841 1.228 1.053 1.006 0.859
##
## $total_txns
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 0.744 0.917 1.263 1.160 1.904 1.773 1.902 1.779 2.311 2.753 1.418 1.950
## 13 14 15
## 3.900 5.199 3.900
##
## $unique_depts
##
## 1 2 3 4 5 6 7
## 0.781 0.924 1.327 1.408 1.538 1.356 1.950
##
## $tenure
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 0.513 0.889 0.647 0.830 0.387 0.473 0.560 0.566 0.465 0.871 0.620 0.831
## 12 13 14 15 16 17 18 19 20 21
## 0.756 0.862 1.195 0.928 1.110 1.161 1.792 1.515 1.403 1.456
##
## $retained_flag
##
## 0 1
## 0.836 1.284
##
## $cmpns
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 0.580 0.544 0.765 0.998 0.820 0.859 1.020 0.936 1.287 1.300 1.108 1.444
## 12 13 14 15 16 17 18 19 20 21 22 23
## 1.766 2.112 2.885 1.800 2.304 3.200 2.925 2.482 4.129 1.300 2.600 5.199
## 24
## 7.799
##
## $ever_responded
##
## 0 1
## 0.735 1.524
##
## $man_dept_buy
##
## 0 1
## 0.977 1.082
##
## $womens_dept_buy
##
## 0 1
## 0.878 1.111
##
## $kids_dept_buy
##
## 0 1
## 0.960 1.184
##
## $athletic_dept_buy
##
## 0 1
## 0.905 1.065
##
## $accessories_dept_buy
##
## 0 1
## 0.879 1.266
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
## jan.1 feb.1 mar.1
## 0.269 0.193 0.334
## apr.1 may.1 jun.1
## 0.387 0.241 0.231
## jul.1 aug.1 sep.1
## 0.131 0.224 0.350
## oct.1 nov.1 dec.1
## 0.263 0.242 0.414
## recency.0 recency.1 recency.3
## 0.131 0.354 -0.114
## recency.4 recency.5 recency.6
## -0.189 -0.237 -0.275
## recency.8 recency.10 recency.13
## -0.237 0.283 0.144
## recency.15 recency.17 recency.18
## -0.123 -0.159 0.228
## recency.21 total_txns.1 total_txns.3
## -0.141 -0.256 0.263
## total_txns.4 total_txns.5 total_txns.6
## 0.160 0.904 0.773
## total_txns.7 total_txns.8 total_txns.9
## 0.902 0.779 1.311
## total_txns.10 total_txns.11 total_txns.12
## 1.753 0.418 0.950
## total_txns.13 total_txns.14 total_txns.15
## 2.900 4.199 2.900
## unique_depts.1 unique_depts.3 unique_depts.4
## -0.219 0.327 0.408
## unique_depts.5 unique_depts.6 unique_depts.7
## 0.538 0.356 0.950
## tenure.0 tenure.1 tenure.2
## -0.487 -0.111 -0.353
## tenure.3 tenure.4 tenure.5
## -0.170 -0.613 -0.527
## tenure.6 tenure.7 tenure.8
## -0.440 -0.434 -0.535
## tenure.9 tenure.10 tenure.11
## -0.129 -0.380 -0.169
## tenure.12 tenure.13 tenure.14
## -0.244 -0.138 0.195
## tenure.16 tenure.17 tenure.18
## 0.110 0.161 0.792
## tenure.19 tenure.20 tenure.21
## 0.515 0.403 0.456
## retained_flag.0 retained_flag.1 cmpns.0
## -0.164 0.284 -0.420
## cmpns.1 cmpns.2 cmpns.4
## -0.456 -0.235 -0.180
## cmpns.5 cmpns.8 cmpns.9
## -0.141 0.287 0.300
## cmpns.10 cmpns.11 cmpns.12
## 0.108 0.444 0.766
## cmpns.13 cmpns.14 cmpns.15
## 1.112 1.885 0.800
## cmpns.16 cmpns.17 cmpns.18
## 1.304 2.200 1.925
## cmpns.19 cmpns.20 cmpns.21
## 1.482 3.129 0.300
## cmpns.22 cmpns.23 cmpns.24
## 1.600 4.199 6.799
## ever_responded.0 ever_responded.1 womens_dept_buy.0
## -0.265 0.524 -0.122
## womens_dept_buy.1 kids_dept_buy.1 accessories_dept_buy.0
## 0.111 0.184 -0.121
## accessories_dept_buy.1
## 0.266
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it
recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
##Plot the tenure abnormality##
################################
#since the tenure is fluturated, I plot it
tenure_abnorm <- data.frame(time=seq(0,21),(abnorm=unlist(compare_rate['tenure'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='tenure fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
cat('Mean of Seg6 / Mean of total:\n')
## Mean of Seg6 / Mean of total:
mean_table<- round(colMeans(myData)/colMeans(seg6),digit=3)[-46]
print(mean_table)
## spend_per_txn spend_per_item jan
## 1.031 1.052 0.788
## feb mar apr
## 0.838 0.749 0.721
## may jun jul
## 0.806 0.812 0.884
## aug sep oct
## 0.817 0.741 0.792
## nov dec total_spend
## 0.805 0.707 0.784
## mens_dept_spend womens_dept_spend kids_dept_spend
## 0.892 0.764 0.800
## athletic_dept_spend accessories_spend recency
## 0.776 0.714 1.014
## response total_txns total_items
## 0.516 0.766 0.757
## unique_sizes unique_depts internet_spend
## 0.865 0.884 0.752
## tenure retained_flag retained_spend
## 0.858 0.779 0.734
## cmpns pct_response ever_responded
## 0.680 0.664 0.656
## opens clicks hhincome
## 1.014 1.060 0.926
## hhage hhwom hhmen
## 0.928 0.921 0.931
## hhkids man_dept_buy womens_dept_buy
## 0.853 0.924 0.900
## kids_dept_buy athletic_dept_buy accessories_dept_buy
## 0.845 0.939 0.790
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
## jan feb mar
## -0.212 -0.162 -0.251
## apr may jun
## -0.279 -0.194 -0.188
## jul aug sep
## -0.116 -0.183 -0.259
## oct nov dec
## -0.208 -0.195 -0.293
## total_spend mens_dept_spend womens_dept_spend
## -0.216 -0.108 -0.236
## kids_dept_spend athletic_dept_spend accessories_spend
## -0.200 -0.224 -0.286
## response total_txns total_items
## -0.484 -0.234 -0.243
## unique_sizes unique_depts internet_spend
## -0.135 -0.116 -0.248
## tenure retained_flag retained_spend
## -0.142 -0.221 -0.266
## cmpns pct_response ever_responded
## -0.320 -0.336 -0.344
## hhkids kids_dept_buy accessories_dept_buy
## -0.147 -0.155 -0.210
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it
month_mean <- data.frame(
month=seq(1,12),
value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\n mean of total', title='Purchase happening in month')+
scale_x_continuous(breaks=seq(1,12))

seg7
########
##seg7##
########
seg7rate <- nrow(seg7)/nrow(myData)
##Run a roop to compare rate( attribute rate / ovservation rate)##
compare_rate <- list()
need_mannual <- c()
for(i in colnames(seg7)){
if(length(table(seg7[,i]))==length(table(myData[,i]))){
compare_rate[i]<-list(value=(round((table(seg7[i])/table(myData[i]))/seg7rate,digit=3)))}
else{
need_mannual <- append(need_mannual,i)
}
}
cat('seg7 compare to total: \n')
## seg7 compare to total:
## $jan
##
## 0 1
## 0.936 1.416
##
## $feb
##
## 0 1
## 0.889 1.668
##
## $mar
##
## 0 1
## 0.950 1.247
##
## $apr
##
## 0 1
## 1.006 0.967
##
## $may
##
## 0 1
## 1.008 0.965
##
## $jun
##
## 0 1
## 0.987 1.073
##
## $jul
##
## 0 1
## 1.036 0.805
##
## $aug
##
## 0 1
## 1.023 0.924
##
## $sep
##
## 0 1
## 1.000 0.998
##
## $oct
##
## 0 1
## 1.01 0.92
##
## $nov
##
## 0 1
## 1.012 0.849
##
## $dec
##
## 0 1
## 1.009 0.926
##
## $recency
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 0.237 0.906 0.960 0.809 1.140 0.813 0.884 1.072 1.807 1.430 0.922 0.879
## 12 13 14 15 16 17 18 19 20 21
## 0.874 0.929 0.778 0.674 0.853 1.251 1.272 1.340 1.839 1.482
##
## $retained_flag
##
## 0 1
## 0.985 1.025
##
## $ever_responded
##
## 0 1
## 0.953 1.093
##
## $man_dept_buy
##
## 0 1
## 1.010 0.964
##
## $womens_dept_buy
##
## 0 1
## 0.933 1.061
##
## $kids_dept_buy
##
## 0 1
## 0.988 1.054
##
## $athletic_dept_buy
##
## 0 1
## 0.927 1.050
##
## $accessories_dept_buy
##
## 0 1
## 0.974 1.058
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
print(unlist(compare_rate)[unlist(compare_rate)>1.1|unlist(compare_rate)<0.9]-1)
## jan.1 feb.0 feb.1 mar.1 jul.1 nov.1
## 0.416 -0.111 0.668 0.247 -0.195 -0.151
## recency.0 recency.3 recency.4 recency.5 recency.6 recency.8
## -0.763 -0.191 0.140 -0.187 -0.116 0.807
## recency.9 recency.11 recency.12 recency.14 recency.15 recency.16
## 0.430 -0.121 -0.126 -0.222 -0.326 -0.147
## recency.17 recency.18 recency.19 recency.20 recency.21
## 0.251 0.272 0.340 0.839 0.482
################################
##Plot the recency abnormality##
################################
#since the recency is fluturated, I plot it
recency_abnorm <- data.frame(time=seq(0,21),abnorm=(unlist(compare_rate['recency'])-1)*100,row.names = NULL)
ggplot(data=recency_abnorm,aes(time,abnorm))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\nthis segment should be', title='recency fluturation')#+geom_text(aes(label = round(abnorm, 2)),vjust = "inward", hjust = "inward",col='firebrick3',show.legend = FALSE)

################################
cat('Mean of Seg7 / Mean of total:\n')
## Mean of Seg7 / Mean of total:
mean_table <- round(colMeans(seg7)/colMeans(myData),digit=3)[-46]
print(mean_table)
## spend_per_txn spend_per_item jan
## 0.985 1.019 1.416
## feb mar apr
## 1.668 1.247 0.967
## may jun jul
## 0.965 1.073 0.805
## aug sep oct
## 0.924 0.998 0.920
## nov dec total_spend
## 0.849 0.926 1.058
## mens_dept_spend womens_dept_spend kids_dept_spend
## 0.933 1.026 1.171
## athletic_dept_spend accessories_spend recency
## 1.096 1.044 1.101
## response total_txns total_items
## 1.105 1.070 1.048
## unique_sizes unique_depts internet_spend
## 1.044 1.037 0.288
## tenure retained_flag retained_spend
## 1.136 1.025 1.060
## cmpns pct_response ever_responded
## 1.098 1.069 1.093
## opens clicks hhincome
## 1.002 0.983 1.010
## hhage hhwom hhmen
## 1.009 1.021 1.009
## hhkids man_dept_buy womens_dept_buy
## 1.006 0.964 1.061
## kids_dept_buy athletic_dept_buy accessories_dept_buy
## 1.054 1.050 1.058
cat('filtered, +/- 10%: \n')
## filtered, +/- 10%:
filter_idx <- mean_table>1.1|mean_table<0.9
mean_table[filter_idx]-1
## jan feb mar jul
## 0.416 0.668 0.247 -0.195
## nov kids_dept_spend recency response
## -0.151 0.171 0.101 0.105
## internet_spend tenure
## -0.712 0.136
##############################
##Plot the month fluturation##
##############################
#since the purchase month is fluturated, I plot it
month_mean <- data.frame(
month=seq(1,12),
value=mean_table[c(seq(3,9),27,seq(10,13))],row.names = NULL)
ggplot(data=month_mean,aes(month,(value-1)*100))+
geom_line(col="royalblue3")+
geom_hline(yintercept=0,col='firebrick3')+
geom_point(col="royalblue3",alpha=0.75)+
labs(y='+- % than\n mean of total', title='Purchase happening in month')+
scale_x_continuous(breaks=seq(1,12))

Categorical Management
idx <- colnames(summary_mean)[c(seq(16,20),seq(41,45))]
category_management <- tibble::rownames_to_column(summary_mean[idx], "segment")
category_management$segment <- as.factor(category_management$segment)
##change the segment's name, if you already have
levels(category_management$segment) <- c('Summer Fling','Mad Christmas','Daddy Daycare','Hood Rich','Coal for Christmas','Personal Shopper','Be My Valentine')
title <- paste(c('MEN','WOMEN','KIDS','ATHLETIC','ACCESSORIES'),"department categorical management",sep=" ")
#####
for (i in seq(2,length(title)+1)){
idx_x=colnames(category_management)[i]
idx_y=colnames(category_management)[i+length(title)]
x=unlist(category_management[,idx_x])
y=unlist(category_management[,idx_y])*100
x_left_angle=min(x)-2
x_right_angle=max(x)+2
y_bot_angle=min(y)-2
y_top_angle=max(y)+2
print(
ggplot(category_management,aes(x,y,col=category_management$segment))+
geom_point(size=5, alpha=0.8)+
annotate("label", fill="grey", x =x_left_angle , y = y_bot_angle, label = " Convenience", alpha = 0.3)+
annotate("label", fill="grey", x =x_left_angle , y = y_top_angle, label = " Occasion", alpha = 0.3)+
annotate("label", fill="grey", x =x_right_angle , y = y_top_angle, label = "Destination ", alpha = 0.3)+
annotate("label", fill="grey", x =x_right_angle , y = y_bot_angle, label = "Routine ", alpha = 0.3)+
geom_hline(yintercept = (max(y)+min(y))/2, col='firebrick3',size=1)+
geom_vline(xintercept = (max(x)+min(x))/2, col='firebrick3',size=1)+
labs(x="average number of item purchase",y="% of customers in segment purchase", title=title[i-1])+
theme(legend.title = element_blank())
)
}





Total_boxplot
mynamestheme <- theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (15)),
axis.title = element_text(family = "Helvetica", size = (5), colour = "steelblue4"),
axis.text = element_text(family = "Courier", colour = "cornflowerblue", size = (15)),
legend.position = "none")
boxplot_var <- c("spend_per_txn","spend_per_item","total_spend", "mens_dept_spend", "womens_dept_spend","kids_dept_spend","athletic_dept_spend","accessories_spend","response", "total_txns", "total_items", "unique_sizes", "unique_depts", "internet_spend", "tenure","retained_spend","cmpns","pct_response", "opens", "clicks","hhincome","hhage","hhwom","hhmen","hhkids")
for (i in boxplot_var){
idx=myData[,i]!=0
temp=myData[idx,c("segment",i)]
x=as.factor(unlist(temp[1]))
y=unlist(temp[2])
print(ggplot(temp,aes(x,y,col=x,na.rm = TRUE))+
geom_boxplot(na.rm = TRUE)+labs(x='segment', title=i, y="")+mynamestheme)
}
























